****************************************************************************************************************************
Oprettet: 16 SEP 2014

Purpose: Add outcome variables
		
****************************************************************************************************************************;


********************************
Outcome: 
High school enrollment
Voccational enrollment
Higher education enrollment
********************************;

*Indhenter først uddannelses information - igangværende uddannelse;
Libname raw1 'F:\Rawdata\702727\data201108';
Libname raw2 'F:\Rawdata\702727\data201204';
Libname raw3 'F:\Rawdata\702727\data201302';


%macro udd(start,slut);
%do i=&start %to &slut;

data udd&i;
set raw1.tot_udda&i (keep = pnr igudd);
year=&i;

%end; 
%mend;
%udd(1980,2009);

data udd2010;
set raw2.tot_udda2010 (keep=pnr udd rename=(udd=igudd));
year=2010;
run;

data udd2011;
set raw3.tot_udda2011 (keep=pnr udd rename=(udd=igudd));
year=2011;
run;

data udd2012;
set raw3.tot_udda2012 (keep=pnr udd rename=(udd=igudd));
year=2012;
run;

data uddannelse;
set udd1980 udd1981 udd1982 udd1983 udd1984 udd1985 udd1986 udd1987 udd1988 udd1989 udd1990 udd1991 udd1992 udd1993 udd1994 
udd1995 udd1996 udd1997 udd1998 udd1999 udd2000 udd2001 udd2002 udd2003 udd2004 udd2005 
udd2006 udd2007 udd2008 udd2009 udd2010 udd2011 udd2012;
run;


*Indhenter nu uddannelses formater;
Libname formater '\\SRVFSENAS3\formater\SAS formater i Danmarks Statistik\SAS_datasaet\uddannelser';


DATA udd_fsp;
 LENGTH igudd $ 4;
 SET formater.c_udd2011_l1l2_k (KEEP=start udd2011_l1l2_k RENAME=(start=igudd udd2011_l1l2_k=igfsp));
run;

PROC SORT DATA=udd_fsp;
 BY igudd;
RUN;


PROC SORT DATA=uddannelse;
BY igudd;
RUN;



DATA uddannelse1 (KEEP=pnr igfsp igudd year ); 
MERGE uddannelse (in=a) udd_fsp;
BY igudd;
IF a=1;
run;


******************************************************
*Henter NICU data som dette skal kobles sammen med
******************************************************;
libname in1 'K:\Data\Workdata\702727\xru\NICU project\Datasæt';

data nicu;
set in1.nicu_samlet (keep = pnr fodtdato birthyear);
run;

proc freq data=nicu;
table birthyear;
run;


data nicu;
set nicu;

if birthyear<1970 then delete;
run;

proc sort data=nicu;
by pnr;
run;

proc sort data=uddannelse1;
by pnr;
run;

data uddannelse2;
merge nicu (in=b) uddannelse1;
by pnr;
if b;
run;

data uddannelse2;
set uddannelse2;

igfsp_kort=substr(igfsp,1,4);

igfsp_kort_num=igfsp_kort*1;
run;

proc freq data=uddannelse2;
tables igfsp_kort;
run;


*Danner forskellige uddannelses kategorier;
data uddannelse2;
set uddannelse2;
udd ="        ";
if igfsp_kort in("1002", " 501") then udd="kl1_6";
if igfsp_kort in("1003") then udd="kl7_10";
if igfsp_kort in("1511") then udd="AMU";
if igfsp_kort in ("1510") then udd="hojskole";
if igfsp_kort in("2015", "2016", "2017", "2018", "2539", "2551") then udd="gym";
if igfsp_kort in("3039", "3040", "3042", "3044", "3046", "3048", "3049", "3050", "3520", "3539", "3553", "3554", "3555", "3558", "3560"
	"3575", "3580", "3585", "3590") then udd="erhverv";
if igfsp_kort in ("4025", "4025", "4039", "4059", "4090", "5020", "5030", "5039", "5059", "5090", "6025", "6030", "6035" "6039"
	"6059", "6080", "6090") then udd="videre";
if igfsp_kort="" then udd="ingen";

*laver numerisk kort igfsp;
udd_num =.;
if igfsp_kort_num in("1002") then udd_num=1;	*"kl1_6";
if igfsp_kort_num in("1003") then udd_num=2;	*"kl7_10";
if igfsp_kort_num in("1511") then udd_num=3;	*"AMU";
if igfsp_kort_num in ("1510") then udd_num=4;	*"hojskole";
if igfsp_kort_num in("2015", "2016", "2017", "2018", "2539", "2551") then udd_num=5; 	*"gym";
if igfsp_kort_num in("3039", "3040", "3042", "3044", "3046", "3048", "3049", "3050", "3520", "3539", "3553", "3554", "3555", "3558", "3560"
	"3575", "3580", "3585", "3590") then udd_num=6; 	*"erhverv";
if igfsp_kort_num in ("4025", "4025", "4039", "4059", "4090", "5010", "5020", "5030", "5039", "5059", "5090", "6025", "6030", "6035" "6039"
	"6059", "6080", "6090") then udd_num=7;	*"videre";
if igfsp_kort_num="" then udd_num=0; *ingen;

run;

*fordeler udd. på år;
data uddannelse3;
set uddannelse2;

if year=birthyear+14 then udd_14=udd_num;
if year=birthyear+15 then udd_15=udd_num;
if year=birthyear+16 then udd_16=udd_num;
if year=birthyear+17 then udd_17=udd_num;
if year=birthyear+18 then udd_18=udd_num;
if year=birthyear+19 then udd_19=udd_num;
if year=birthyear+20 then udd_20=udd_num;

drop year igfsp_kort_num udd_num;
run;

proc sort data=uddannelse3;
by pnr;
run;

*Beholder kun en observation per pnr nummer;
proc means data=uddannelse3 NWAY NOPRINT;
class pnr;
Output out=uddannelse4 min=;
run;

proc freq data=uddannelse4;
tables udd_14 udd_15 udd_16 udd_17 udd_18 udd_19 udd_20;
run;

*som 14årig er alle i folkeskolen;
*Og ingen er på en videregående uddannelse før de fylder 18;

data uddannelse4;
set uddannelse4;

hs_enrollment=0;
if udd_14=5 or udd_15=5 or udd_16=5 or udd_17=5 or udd_18=5 or udd_19=5  then hs_enrollment=1;

/*
voc_enrollment=0;
if udd_14=6 or udd_15=6 or udd_16=6 or udd_17=6 or udd_18=6 or udd_19=6 or udd_20=6  then voc_enrollment=1;
higher_enrollment=0;
if udd_14=7 or udd_15=7 or udd_16=7 or udd_17=7 or udd_18=7 or udd_19=7 or udd_20=7  then higher_enrollment=1;
*/
run;


proc freq data=uddannelse4;
tables hs_enrollment;
run;


proc sort data=uddannelse4 (keep = pnr hs_enrollment);
by pnr;
run;


*********************************************************
*Kan nu kombinere dette med mit samlede søskende datasæt
*********************************************************;
data data;
set in1.nicu_samlet;
run;

proc sort data=data;
by pnr;
run;

data data1;
merge data (in=c) uddannelse4;
by pnr;
if c;
run;



****************************************************************************************************************************
***************************************
Formål: Tilføjer crime som outcome - Uden traffic.
***************************************
****************************************************************************************************************************;

libname crime "K:\data\workdata\702727\crime\Data";

************************************** 
Henter kriminalitets data
	Dannet af Tine
*************************************;

data crimes;
set crime.crimes (keep = pnr scridto violence property othercrime special traffic penallaw safgdto straf bode warning allcrime debut_y year);
proc sort;
by pnr;
run;

proc freq data=crimes;
table year;
run;


******************************************************
*Henter sibling data som dette skal kobles sammen med
******************************************************;

data data1a;
set data1 (keep = pnr birthyear);

if birthyear<1970 then delete;
if birthyear>2000 then delete;	/*Dropper de ynste observationer for at spare kapacitet, da de ikke kan nå at begår crime */

proc sort;
by pnr;
run;

*merger børn med crimes data;
data crimes1;
merge data1a (in=a) crimes;
by pnr;
if a;
proc sort;
by pnr year;
run;
proc freq data=crimes1;
table violence property othercrime special traffic;
run;

data crimes1a;
set crimes1;
if traffic=1 then delete;
run;


proc freq data=crimes1a;
table violence property othercrime special traffic;
run;

******************************************
Beregner nu alder ved første kriminalitet
******************************************;

data crimes2;
set crimes1a;

alder_crime=year-birthyear;

alder_violence=.;
if violence=1 then alder_violence=year-birthyear;
alder_property=.;
if property=1 then alder_property=year-birthyear;
alder_othercrime=.;
if othercrime=1 then alder_othercrime=year-birthyear;
alder_special=.;
if special=1 then alder_special=year-birthyear;

run;

proc sort data=crimes2;
by pnr year;
run;



*Alder første fængsling;
data crimes2;
set crimes2;

if straf=1 then alder_prison=year-birthyear;
run;

proc freq data=crimes2;
table alder_crime alder_prison;
run;

*Sletter observation hvis kriminalitet er begået før 15 års alder;
data crimes3;
set crimes2;

if alder_crime<15 and alder_crime^=. then delete;

if alder_violence<15 and alder_violence^=. then delete;
if alder_property<15 and alder_property^=. then delete;
if alder_othercrime<15 and alder_othercrime^=. then delete;
if alder_special<15 and alder_special^=. then delete;

if alder_prison<15 and alder_prison^=. then delete;
run;



%macro crimealder(start,slut);
%do i=&start %to &slut;

data crime&i;
set crimes3 (keep = pnr birthyear alder_crime);
if alder_crime=&i;

crime&i=1;

proc sort;
by pnr;
 
run;
%end;
%mend;
%crimealder(15,21);



%macro first(start,slut);
%do i=&start %to &slut;

data crime&i;
set crime&i;
by pnr;
if first.pnr;
 
run;
%end;
%mend;
%first(15,21);


data crime_all;
merge crime15-crime21;
by pnr;
run;


data data2;
merge data1 (in=a) crime_all;
by pnr;
if a;
run;

data data3;
set data2;

if crime15=. and birthyear<1997 then crime15=0;
if crime16=. and birthyear<1996 then crime16=0;
if crime17=. and birthyear<1995 then crime17=0;
if crime18=. and birthyear<1994 then crime18=0;
if crime19=. and birthyear<1993 then crime19=0;
if crime20=. and birthyear<1992 then crime20=0;
if crime21=. and birthyear<1991 then crime21=0;

run;


proc means data=data3;
var crime:;
run;






****************************************************************************************************************************
***************************************
Formål: Tilføjer parenthood som outcome.
***************************************
****************************************************************************************************************************;


******************************************************
*Henter nicu data
******************************************************;
data person;
set in1.nicu_samlet (keep = pnr fodtdato birthyear male);
run;

proc freq data=person;
table birthyear;
run;


*********************************************************
Vil betragte fatherhood at age 20
Har kun fødselsinfo til om med 2010
Gemmer derfor kun individer født før 1990
 
*********************************************************;
data person1;
set person;
if birthyear>1990 then delete;
run;


*Danner en variabel for det år personen er 20;
data person1;
set person1;

year20=birthyear+20;
run;

proc print data=person1 (obs=100);
var birthyear year20;
run;


**************************************************************************
Indlæser fodsel2010 da jeg her kan finde hvilke personer er blevet far
**************************************************************************;

Libname raw3 'F:\Rawdata\702727\data201301';

data fodsel;
set raw3.fodsel_10 (keep = pnr pnrm pnrf FODTDATO);

fodselsaar_barnFB=year(FODTDATO);
run;

data fodsel;
set fodsel;

if fodselsaar_barnFB<1990 then delete;
run;

proc freq data=fodsel;
table fodselsaar_barnFB;
run;


*--------------------*
Gøres først for far
*--------------------*;
data fodsel_far;
set fodsel;

if pnrf=. then delete;
run;

proc sort data=fodsel_far;
by pnrf fodselsaar_barnFB; 
run;

*Gemmer kun første observation af pnrf;
data fodsel_far1;
set fodsel_far (drop=pnr pnrm FODTDATO);
by pnrf;
if first.pnrf;
run;

data fodsel_far2;
set fodsel_far1;
rename pnrf=pnr;
run;

*Merge information om fatherhood på samlede data;
proc sort data=fodsel_far2;
by pnr;
run;

data person1_far;
set person1;
if male=0 then delete;
run;
 
proc sort data=person1_far;
by pnr;
run;

data fatherhood; 
merge person1_far (in=a) fodsel_far2;
by pnr;
if a;
run;


*************************************************************
Kan nu danne en variabel der indikerer, 
om personen har fået barn inden alderen 20
*************************************************************;

data fatherhood;
set fatherhood;

fatherhood20=0;

if fodselsaar_barnFB<=year20 and fodselsaar_barnFB^=.  then fatherhood20=1;
else if fodselsaar_barnFB=. then fatherhood20=0;

run;

proc sort data=fatherhood (keep= pnr fatherhood20);
by pnr;
run;


*********************************************************
*Kan nu kombinere dette med mit samlede søskende datasæt
*********************************************************;

proc sort data=data3;
by pnr;
run;

data data4;
merge data3 (in=e) fatherhood;
by pnr;
if e;
run;



*--------------------*
Og nu for mor
*--------------------*;
data fodsel_mor;
set fodsel;

if pnrm=. then delete;
run;

proc sort data=fodsel_mor;
by pnrm fodselsaar_barnFB; 
run;

*Gemmer kun første observation af pnrf;
data fodsel_mor1;
set fodsel_mor (drop=pnr pnrf FODTDATO);
by pnrm;
if first.pnrm;
run;

data fodsel_mor2;
set fodsel_mor1;
rename pnrm=pnr;
run;

*Merge information om fatherhood på samlede data;
proc sort data=fodsel_mor2;
by pnr;
run;

data person1_mor;
set person1;
if male=1 then delete;
run;
 
proc sort data=person1_mor;
by pnr;
run;

data motherhood; 
merge person1_mor (in=a) fodsel_mor2;
by pnr;
if a;
run;


*************************************************************
Kan nu danne en variabel der indikerer, 
om personen har fået barn inden alderen 20
*************************************************************;

data motherhood;
set motherhood;

motherhood20=0;

if fodselsaar_barnFB<=year20 and fodselsaar_barnFB^=.  then motherhood20=1;
else if fodselsaar_barnFB=. then motherhood20=0;

run;

proc sort data=motherhood (keep= pnr motherhood20);
by pnr;
run;


*********************************************************
*Kan nu kombinere dette med mit samlede søskende datasæt
*********************************************************;

proc sort data=data4;
by pnr;
run;

data data5;
merge data4 (in=f) motherhood;
by pnr;
if f;
run;

data data5;
set data5;

parenthood20=0;

if fatherhood20=1 or motherhood20=1 then parenthood20=1;
run;


data data5;
set data5;

if birthyear>1990 then parenthood20=.;
run;




******************************************************************************************************************************
Sætter outcomes til missing hvis individets kohorte er udenfor relevante kohorte
	Fx. for folkeskole outcomes_ relevante kohorter er 1986-1997, 
		Enkelte født i 84 eller fra 98 vil have en karakter, men de har enten været meget unge eller meget gamle ifht normalen 
***********************************************************************************************************************************;

data data6;
set data5;

if birthyear<1986 then karakter1=.;
if birthyear>1997 then karakter1=.;
if birthyear<1986 then dansk_m_prove1=.;
if birthyear>1997 then dansk_m_prove1=.;
if birthyear<1986 then mat_s_prove1=.;
if birthyear>1997 then mat_s_prove1=.;

if birthyear<1973 then hs_enrollment=.;
if birthyear>1993 then hs_enrollment=.;

if birthyear<1973 then parenthood20=.;
if birthyear>1990 then parenthood20=.;

run;


*Gemmer datasæt;

/*
*STATA datasæt;
proc export data=data6 outfile="K:\Data\Workdata\702727\xru\NICU project\Datasæt\nicu_outcomes.dta" dbms=stata replace;
run;
*/

libname out 'K:\Data\Workdata\702727\xru\NICU project\Datasæt';
data out.nicu_outcomes;
set data6;
run;





















